Methodologies for Spatially Gridding Climate Adaptation Options from Econometric Methods

Author

Maxwell Mkondiwa

1 Introduction

2 Imprtant geospatial R packages: Terra, geodata,sf, sp

library(geodata)

India=gadm(country="IND", level=2, path=tempdir())
plot(India)

India_aoi=subset(India,India$NAME_1=="Bihar"|India$NAME_2%in%c("Ballia","Chandauli","Deoria","Ghazipur","Kushinagar","Maharajganj","Mau","Siddharth Nagar","Gorakhpur"))

plot(India_aoi)

plot(India_aoi, add=TRUE)

library(sf)

India_aoi_sf=st_as_sf(India_aoi)
library(mapview)

mapview(India_aoi_sf)
# Dissolve the district polygons to form new polygon of Bihar and EUP
library(sf)
India_aoi_sf_dis=st_union(India_aoi_sf)

3 Causal Random Forest Model to Get Individual Treatment Effects

4 Point-geocoded data

4.1 Gridded data input variables

The first strategy is to translate all variables to the grid. This involves interpolation across space and using new variable names. In this case, instead of gender being a dummy, you use a proportion of female or male after interpolation.

4.1.1 Proximity polygons

library(rio)
LDS=import("LDS_wheat_public_cleaned.csv")

library(sp)
LDS_sp=SpatialPointsDataFrame(cbind(LDS$O.largestPlotGPS.Longitude,LDS$O.largestPlotGPS.Latitude),data=LDS,proj4string=CRS("+proj=longlat +datum=WGS84"))

library(terra)
LDS_v=vect(LDS_sp)
if (!require("rspat")) remotes::install_github('rspatial/rspat')

library(rspat)
v <- voronoi(LDS_v)
plot(v)
points(LDS_v)

v_india_aoi <- crop(v,India_aoi)
plot(v_india_aoi, "yield_kgperha")

# r <- rast(India_aoi, res=10000)
# vr <- terra::rasterize(v_india_aoi, r, "yield_kgperha")
# plot(vr)

4.1.2 Nearest-neigbor

# gs <- gstat(formula=prec~1, locations=~x+y, data=d, nmax=5, set=list(idp = 0))
# nn <- interpolate(r, gs, debug.level=0)
# nnmsk <- mask(nn, vr)
# plot(nnmsk, 1)

4.1.3 Inverse distance weighted

# library(gstat)
# gs <- gstat(formula=prec~1, locations=~x+y, data=d)
# idw <- interpolate(r, gs, debug.level=0)
# idwr <- mask(idw, vr)
# plot(idwr, 1)
# 
# 

4.2 Model based kriging

4.3 Model-based predictions

4.3.1 Random forest and raster prediction

This approach follows notes from reago website by Robert Hjimans (https://reagro.org/cases/croptrials.html).

library(rio)
LDS=import("LDS_wheat_public_cleaned.csv")
table(LDS$A.q103_district,LDS$A.q102_state)
                
                 Bihar UttarPradesh
  Arah             208            0
  Araria           117            0
  Arwal            181            0
  Aurangabad       194            0
  Balia              0          216
  Banka            176            0
  Begusarai        213            0
  Bhagalpur        207            0
  Buxar            207            0
  Chandauli          0          208
  Deoria             0          209
  EastChamparan    205            0
  Gaya             193            0
  Gazipur            0          210
  Gopalganj        210            0
  Gorakhpur          0          210
  Jehanabad        189            0
  Kaimur           204            0
  Katihar          115            0
  Khagaria         205            0
  Kushinagar         0          195
  Lakhisarai       195            0
  Madhepura        169            0
  Maharajganj        0          210
  Mau                0          204
  Munger           210            0
  Muzaffarpur      204            0
  Nalanda          196            0
  Patna            203            0
  Purnia             8            0
  Rohtas           196            0
  Saharsa          163            0
  Samastipur       202            0
  Saran            209            0
  Sheohar          209            0
  Siddharthnagar     0          193
  Siwan            198            0
  Supaul           206            0
  Vaishali         196            0
  WestChamparan    205            0
plot(LDS$O.largestPlotGPS.Longitude, LDS$O.largestPlotGPS.Latitude, col="red", pch=20)

# Random Forest Estimation 
library(randomForest)

RF_model <- randomForest(yield_kgperha ~ O.largestPlotGPS.Longitude + O.largestPlotGPS.Latitude, data=LDS)

varImpPlot(RF_model)

RF_model_pred = predict(RF_model)
plot(LDS$yield_kgperha, RF_model_pred)
abline(0,1)

# Raster prediction

## Create grid with extent
library(raster)

e <- extent(c(min(LDS$O.largestPlotGPS.Longitude)-2,max(LDS$O.largestPlotGPS.Longitude)+2,min(LDS$O.largestPlotGPS.Latitude)-2,max(LDS$O.largestPlotGPS.Latitude)+2))

aoi <- raster(ext=e, res=1/6)

pp <- interpolate(aoi, RF_model, xyNames=c('O.largestPlotGPS.Longitude', 'O.largestPlotGPS.Latitude'))
pp <- mask(pp, India_aoi_sf)
pp <- crop(pp, India_aoi_sf)
plot(pp)
points(LDS$O.largestPlotGPS.Longitude, LDS$O.largestPlotGPS.Latitude, col="blue")

4.3.2 Spatial Bayesian Geostatistical Gaussian Process Model

If one is interested in calculating other measures other than the predicted value (for example, the probability of exceeding some amount), then a Bayesian gaussian process model is the best alternative in that using Markov Chain Monte Carlo simulations one can use a probabilistic assessment.

### Bayesian kriging 

LDS_small <- LDS[sample(1:nrow(LDS),800),] 


coords=dplyr::select(LDS_small,O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude)
coords=as.matrix(coords)

# The public version of the data has duplicated coordinates
# We need to jitter these because spatial Bayesian kriging requires unique coordinates.
library(geoR)
coords=jitterDupCoords(coords,min=2,max=10)
coords=as.matrix(coords)

# Bayesian models take much time to render. We sample 1000 observations to showcase the approach
library(spBayes)
n.samples=1000

t1 <- Sys.time()

r <-1
n.ltr <- r*(r+1)/2

priors <- list("phi.Unif"=list(rep(1,r), rep(10,r)), "K.IW"=list(r, diag(rep(1,r))), "tau.sq.IG"=c(2, 1))

starting <- list("phi"=rep(3/0.5,r), "A"=rep(1,n.ltr), "tau.sq"=1)

tuning <- list("phi"=rep(0.1,r), "A"=rep(0.01, n.ltr), "tau.sq"=0.01)

cf.sowing.sp <- spBayes::spSVC(yield_kgperha~1, data=LDS_small,coords=coords,
                                  starting= starting,
                                  tuning=tuning,
                                  priors=priors,
                                  cov.model="exponential",n.samples=n.samples,
                                  n.omp.threads=15,svc.cols=c("(Intercept)"))
----------------------------------------
    General model description
----------------------------------------
Model fit with 800 observations.

Number of covariates 1.

Number of space varying covariates 1.

Using the exponential spatial correlation model.

Number of MCMC samples 1000.

Priors and hyperpriors:
    beta flat.
    K IW hyperpriors:
    df: 1.00000
    S:
    1.000   

    phi Unif lower bound hyperpriors:   1.000   
    phi Unif upper bound hyperpriors:   10.000  

    tau.sq IG hyperpriors shape=2.00000 and scale=1.00000

Source compiled with OpenMP, posterior sampling is using 15 thread(s).
-------------------------------------------------
        Sampling
-------------------------------------------------
Sampled: 100 of 1000, 10.00%
Report interval Metrop. Acceptance rate: 64.00%
Overall Metrop. Acceptance rate: 64.00%
-------------------------------------------------
Sampled: 200 of 1000, 20.00%
Report interval Metrop. Acceptance rate: 42.00%
Overall Metrop. Acceptance rate: 53.00%
-------------------------------------------------
Sampled: 300 of 1000, 30.00%
Report interval Metrop. Acceptance rate: 22.00%
Overall Metrop. Acceptance rate: 42.67%
-------------------------------------------------
Sampled: 400 of 1000, 40.00%
Report interval Metrop. Acceptance rate: 24.00%
Overall Metrop. Acceptance rate: 38.00%
-------------------------------------------------
Sampled: 500 of 1000, 50.00%
Report interval Metrop. Acceptance rate: 30.00%
Overall Metrop. Acceptance rate: 36.40%
-------------------------------------------------
Sampled: 600 of 1000, 60.00%
Report interval Metrop. Acceptance rate: 30.00%
Overall Metrop. Acceptance rate: 35.33%
-------------------------------------------------
Sampled: 700 of 1000, 70.00%
Report interval Metrop. Acceptance rate: 32.00%
Overall Metrop. Acceptance rate: 34.86%
-------------------------------------------------
Sampled: 800 of 1000, 80.00%
Report interval Metrop. Acceptance rate: 24.00%
Overall Metrop. Acceptance rate: 33.50%
-------------------------------------------------
Sampled: 900 of 1000, 90.00%
Report interval Metrop. Acceptance rate: 34.00%
Overall Metrop. Acceptance rate: 33.56%
-------------------------------------------------
Sampled: 1000 of 1000, 100.00%
Report interval Metrop. Acceptance rate: 23.00%
Overall Metrop. Acceptance rate: 32.50%
-------------------------------------------------
t2 <- Sys.time()
t2 - t1
Time difference of 1.011444 mins
burn.in <- floor(0.75*n.samples)

cf.sowing.sp.r <- spRecover(cf.sowing.sp, start=burn.in)
Source compiled with OpenMP, posterior sampling is using 1 thread(s).
-------------------------------------------------
    Recovering beta and w
-------------------------------------------------
Sampled: 99 of 251, 39.44%
Sampled: 199 of 251, 79.28%
# Kriging
library(terra)
library(stars)
library(raster)
library(gstat) # Use gstat's idw routine
library(sp)    # Used for the spsample function
library(tmap)
library(geodata)

# India=gadm(country="IND", level=1, path=tempdir())
# plot(India)
# India_State_Boundary=subset(India,India$NAME_1=="Bihar")
# plot(India_State_Boundary)
# library(sf)

#India_State_Boundary=st_as_sf(India_State_Boundary)

#India_State_Boundary_Bihar_sp=as_Spatial(India_State_Boundary_Bihar)

library(spBayes)
India_aoi_sf_dis_sp=as_Spatial(India_aoi_sf_dis)

LDS_small_sp=SpatialPointsDataFrame(cbind(LDS_small$O.largestPlotGPS.Longitude,LDS_small$O.largestPlotGPS.Latitude),data=LDS_small,proj4string=CRS("+proj=longlat +datum=WGS84"))


LDS_small_sp@bbox <- India_aoi_sf_dis_sp@bbox

grd <- as.data.frame(spsample(LDS_small_sp, "regular", n=10000))

names(grd)       <- c("X", "Y")
coordinates(grd) <- c("X", "Y")
gridded(grd)     <- TRUE  # Create SpatialPixel object
fullgrid(grd)    <- TRUE  # Create SpatialGrid object
plot(grd)

India_aoi_sf_dis_sp_poly <- India_aoi_sf_dis_sp@polygons[[1]]@Polygons[[1]]@coords
India_aoi_sf_dis_sp_poly <- as.matrix(India_aoi_sf_dis_sp_poly)

pred.coords <- SpatialPoints(grd)@coords
pred.coords =as.matrix(pred.coords)

pred.covars <- as.matrix(rep(1, nrow(pred.coords)))

cf.sowing.sp.pred <- spPredict(cf.sowing.sp.r,pred.coords=pred.coords,
                                    pred.covars=pred.covars, n.omp.threads=15)
Source compiled with OpenMP, posterior sampling is using 15 thread(s).
-------------------------------------------------
    Point-wise sampling of predicted w
-------------------------------------------------
Sampled: 99 of 251, 39.44%
Sampled: 199 of 251, 79.28%
cf.sowing.sp.pred.pred.mu = apply(cf.sowing.sp.pred$p.y.predictive.samples,1,mean)
cf.sowing.sp.pred.sd = apply(cf.sowing.sp.pred$p.y.predictive.samples,1,sd)

library(MBA)
library(fields)

pred.grid <- as.data.frame(list(pred.coords,pred.mu=cf.sowing.sp.pred.pred.mu,pred.sd=cf.sowing.sp.pred.sd))

coordinates(pred.grid) = c("X", "Y")
gridded(pred.grid) <- TRUE
pred.mu.image <- as.image.SpatialGridDataFrame(pred.grid["pred.mu"])
pred.sd.image <- as.image.SpatialGridDataFrame(pred.grid["pred.sd"])


# predict and probability ------------------------------------------------
cf.sowing.sp.pred.pred.prob_3tons=rowSums(cf.sowing.sp.pred$p.y.predictive.samples>3000)/251
cf.sowing.sp.pred.pred.prob_5tons=rowSums(cf.sowing.sp.pred$p.y.predictive.samples>5000)/251


pred.grid <- as.data.frame(list(pred.coords,pred.mu=cf.sowing.sp.pred.pred.mu,pred.sd=cf.sowing.sp.pred.sd,
                                pred.prob_3tons=cf.sowing.sp.pred.pred.prob_3tons,
                                pred.prob_5tons=cf.sowing.sp.pred.pred.prob_5tons))

coordinates(pred.grid) = c("X", "Y")
gridded(pred.grid) <- TRUE

pred.mu.image <- as.image.SpatialGridDataFrame(pred.grid["pred.mu"])
pred.sd.image <- as.image.SpatialGridDataFrame(pred.grid["pred.sd"])
pred.prob.image_3tons <- as.image.SpatialGridDataFrame(pred.grid["pred.prob_3tons"])
pred.prob.image_5tons<- as.image.SpatialGridDataFrame(pred.grid["pred.prob_5tons"])

# Rastervis
library(rasterVis)
pred.mu=pred.grid["pred.mu"]
pred.mu=raster(pred.mu)
pred.mu=mask(pred.mu,India_aoi_sf_dis_sp)
pred.mu_plot=levelplot(pred.mu,par.settings=RdBuTheme(),contour=TRUE)
pred.mu_plot

# Standard deviation
library(rasterVis)
pred.sd=pred.grid["pred.sd"]
pred.sd=raster(pred.sd)
pred.sd=mask(pred.sd,India_aoi_sf_dis_sp)
pred.sd_plot=levelplot(pred.sd,par.settings=RdBuTheme(),contour=TRUE)
pred.sd_plot

# Probability of 3 tons
pred.prob_3tons=pred.grid["pred.prob_3tons"]
pred.prob_3tons=raster(pred.prob_3tons)
pred.prob_3tons=mask(pred.prob_3tons,India_aoi_sf_dis_sp)
pred.prob_3tons_plot=levelplot(pred.prob_3tons,par.settings=RdBuTheme(),contour=TRUE)
pred.prob_3tons_plot

# Probability of 5 tons
pred.prob_5tons=pred.grid["pred.prob_5tons"]
pred.prob_5tons=raster(pred.prob_5tons)
pred.prob_5tons=mask(pred.prob_5tons,India_aoi_sf_dis_sp)
pred.prob_5tons_plot=levelplot(pred.prob_5tons,par.settings=RdBuTheme(),contour=TRUE)
pred.prob_5tons_plot

4.3.3 Spatial Bayesian Geoadditive Model

5 Areal data

5.1 Areal centroid random field gaussian process model

One can get the centroid of each polygon, e.g., then fit a geostatistical model. This can then be gridded across the area of interest.






5.2 Areal-point downscaling

5.2.1 Markov Random Field (MRF) Geoaddive Structured and Unstructured Spatial Model

6 Conclusion

7 References